# Folder pathsinput_path <-"data/input_data/LDH/"metadata_path <-"data/metadata/LDH"# Create subfolders for output filesdataframes_folder <-"data/dataframes"if (!file.exists("data/dataframes")) {dir.create("data/dataframes", recursive =TRUE)}# Load data and metadatainput_data <-read_excel(file.path(input_path, "LAK24_0118_LDHData.xlsx")) %>%mutate_if(is.character, factor)Bac_order <-read_csv(file.path(metadata_path, "Order_Bacteria_LDH.csv"))
Data clean-up
# Filter only samples matched to CFU wellsLDH_data <- input_data %>%filter(CFUmatched =="T")# Factor Ordering and StylingLDH_data <-merge(LDH_data, Bac_order, by ="Species") LDH_data$bacteria_label <-factor(LDH_data$bacteria_label, levels = Bac_order$bacteria_label)LDH_data$Line <-fct_recode(LDH_data$Line, "HNO918"="A", "HNO204"="B", "HNO919"="C") # Average technical replicates for each individual experimentLDH_avg <- LDH_data %>%group_by(Date, Line, Species, bacteria_label, Temp, Well_Endpoint, Collection_Time) %>%summarise(avg_Value =mean(Value)) %>%# Add labels to distinguish final collection times from time 0mutate(Collection_Label =if_else(Collection_Time ==24| Collection_Time ==48, "final", "initial"))# Calculate log2 fold change between final time point vs. time 0LDH_FC <- LDH_avg %>%group_by(Date, Line, Species, bacteria_label, Temp, Well_Endpoint) %>%reframe(FC = avg_Value[Collection_Label =="final"]/avg_Value[Collection_Label =="initial"])
Saving files
# Save data frames as CSV files in the output folderwrite_csv(LDH_data, file.path(dataframes_folder, "LDH_values.csv"))write_csv(LDH_FC, file.path(dataframes_folder, "LDH_FC.csv"))# Save data frames as R objects in the output foldersaveRDS(LDH_data, file.path(dataframes_folder, "LDH_values.rds"))saveRDS(LDH_FC, file.path(dataframes_folder, "LDH_FC.rds"))# Cleaning-up all objects from the environmentrm(list =ls())# Use this to read the final objectsLDH_data <-readRDS("data/dataframes/LDH_values.rds")LDH_FC <-readRDS("data/dataframes/LDH_FC.rds")
Stats and Plots
File Paths
# Folder pathsdataframes_path <-"data/dataframes"metadata_path <-"data/metadata/LDH"# Create subfolders for output filesfigures_folder <-"data/outputs/LDH/figures"if (!file.exists("data/outputs/LDH/figures")) {dir.create("data/outputs/LDH/figures", recursive =TRUE)}stats_folder <-"data/outputs/LDH/stats"if (!file.exists("data/outputs/LDH/stats")) {dir.create("data/outputs/LDH/stats", recursive =TRUE)}# Load data and metadataLDH_FC <-readRDS("data/dataframes/LDH_FC.rds")Bac_order <-read_csv(file.path(metadata_path, "Order_Bacteria_LDH.csv"))
Function for each temp condition and endpoint
# Function to analyze each temp conditionanalysis_function <-function(data, each_temp, each_endpoint, cutoff_pvalue, cutoff_FC) {# Subset the data to the selected temp data_subset <- data %>%filter(Temp == each_temp) %>%filter(Well_Endpoint == each_endpoint)# Mixed-effects model with random effects model <-lmer(FC ~ Species + (1|Line) + (1|Line:Date), data = data_subset)#Anova anova <-anova(model) anova_df <-as.data.frame(anova) %>%mutate(sign =case_when(`Pr(>F)`< cutoff_pvalue ~"*",TRUE~"")) %>%mutate_if(is.numeric, ~format(., digits =2, scientific =TRUE))# Calculate Individual contrasts emmeans_model <-emmeans(model, ~ Species) emmeans_contrasts <-pairs(emmeans_model, adjust ="none") # Extract random effects and convert to dataframe (if not singular) random_effects_df <-as.data.frame(VarCorr(model)) %>%mutate(proportion =round(100* (vcov /sum(vcov)), 2)) # Adds predictions based on fixed effects, averaged over random effects. It gives a population estimate data_subset <-cbind(data_subset, predval =predict(model,re.form =NA, se.fit =TRUE)) data_summary_df <- data_subset %>%group_by(Species, bacteria_label) %>%summarize(mean.real =mean(FC),mean.predval =mean(predval.fit), mean.predval.se =mean(predval.se.fit)) %>%mutate(max = mean.predval +2*mean.predval.se,min = mean.predval -2*mean.predval.se)# Convert contrasts to dataframe and adjust pvalues. Filter contrast to Uncolonized only contrasts_df <-as.data.frame(summary(emmeans_contrasts)) %>%filter(str_detect(contrast, "Uncolonized")) %>%mutate(p.adj.holm =p.adjust(p.value, method ="holm")) %>%mutate(sign =case_when( p.adj.holm <0.05~"*",TRUE~""))# Edits to the contrast dataframe to include pvalue brackets in plot contrasts_df <- contrasts_df %>%separate(contrast, into =c("condition1", "condition2"), sep =" - ", remove = F) contrasts_df$group1 <- Bac_order$bacteria_label[match(contrasts_df$condition1, Bac_order$Species)] contrasts_df$group2 <- Bac_order$bacteria_label[match(contrasts_df$condition2, Bac_order$Species)]# Calculate fold-change values for each contrast contrasts_df <- contrasts_df %>%ungroup() %>%left_join(select(data_summary_df, Species, mean.predval), by =join_by(condition1 == Species)) %>%left_join(select(data_summary_df, Species, mean.predval), by =join_by(condition2 == Species), suffix =c(".1", ".2")) %>%mutate(FC = mean.predval.1/ mean.predval.2,FC =if_else(FC <1, -1/ FC, FC),highlighted =case_when( FC <=-cutoff_FC ~"-", FC >= cutoff_FC ~"+",TRUE~"")) # Select p values to plot and define their location contrast_sign <- contrasts_df %>%filter(sign !=""& highlighted !="") %>%mutate(p.adj.holm =format(p.adj.holm, digits =2, scientific =TRUE)) location <-max(data_subset$FC, na.rm =TRUE) *1.1# Standard Boxplot plot_1 <-ggplot() +geom_boxplot(data = data_subset, aes(x = bacteria_label, y = FC, fill = bacteria_label)) +geom_jitter(data = data_subset, aes(x = bacteria_label, y = FC, shape = Line), fill ="grey50", color ="grey30", size =2, width =0.05, stroke =0.75) +scale_fill_manual(values =c("grey80","#AA35E3","#2e67f2","#927ed1")) +scale_shape_manual(values =c(21, 22, 24)) +labs(x ="",y ="Fold change in LDH from -1 hour") +theme_bw() +theme(panel.grid =element_blank(),legend.position ="none",text =element_text(size =20), axis.text.x =element_markdown(), axis.text.y =element_text(color ="black"))# Plot with predicted means and standard errors of the estimates plot_2 <-ggplot() +geom_point(data = data_subset, aes(x = bacteria_label, y = FC, fill = bacteria_label, color = bacteria_label, shape = Line), position =position_jitterdodge(dodge.width =0.7, jitter.width =0.2),size =1.5, alpha =0.75, stroke =0.75) +geom_point(data = data_summary_df, aes(x = bacteria_label, y = mean.predval), shape =3, size =3) +geom_errorbar(data = data_summary_df, aes(x = bacteria_label,y = mean.predval,ymax = max,ymin = min),width =0.4) +scale_fill_manual(values =c("#5b5b5b","#AA35E3","#2e67f2","#927ed1")) +scale_color_manual(values =c("#5b5b5b","#AA35E3","#2e67f2","#927ed1")) +scale_shape_manual(values =c(21, 22, 24)) +scale_y_continuous(expand =c(0.1,0)) +labs(x ="",y ="Fold change in LDH from -1 hour",fill ="Bacteria", color ="Bacteria", shape ="HNO Line") +theme_bw() +theme(panel.grid =element_blank(), legend.text =element_markdown(),text =element_text(size =20), axis.text.x =element_markdown(), axis.text.y =element_text(color ="black"))# Conditionally add p-value annotations layerif (nrow(contrast_sign) >0) { plot_2 <- plot_2 +stat_pvalue_manual(contrast_sign, label ="p.adj.holm", y.position = location,tip.length =0.02, bracket.shorten =0.2, vjust =-0.2, bracket.size =0.3, size =3.5) } else { plot_2 <- plot_2 }# Arrange plot and tables for summary pdf table <- contrasts_df %>%select(condition1, condition2, p.adj.holm, sign, mean.predval.1, mean.predval.2, FC, highlighted) %>%mutate(p.adj.holm =format(p.adj.holm, digits =2, scientific =TRUE)) Tmin <-ttheme_minimal() panel <-ggarrange(plot_1 +theme(plot.margin =unit(c(0.25,0.25,0.25,0.25), "in")), plot_2 +theme(plot.margin =unit(c(0.25,0.25,0.25,0.25), "in")),tableGrob(anova_df, theme = Tmin), tableGrob(random_effects_df, theme = Tmin, rows =NULL), tableGrob(table, theme = Tmin, rows =NULL), ncol =1, heights =c(0.6, 0.6, 0.1, 0.2, 0.2),labels =c(" Standard Boxplot ", "Predicted Mean ± 2*SE", " Anova ", "Random Effects", " Contrasts ")) panel <-annotate_figure(panel, top =text_grob(paste0("Analysis for ", each_temp, "C timepoint ", each_endpoint, " h. P-value: ", cutoff_pvalue, " and FC: ", cutoff_FC),face ="bold", size =14, color ="red"))# Save filesggsave(panel, filename =paste0(figures_folder, "/summaryLDH_", each_temp, "C_", each_endpoint, "h.pdf"), width =9, height =14)ggsave(plot_2, filename =paste0(figures_folder, "/plotLDH_", each_temp, "C_", each_endpoint, "h.png"), width =7, height =6)saveRDS(plot_2, file.path(figures_folder, paste0("plotLDH_", each_temp, "C_", each_endpoint, "h.rds")))write_csv(anova_df, file.path(stats_folder, paste0("anova_", each_temp, "C_", each_endpoint, "h.csv")))write_csv(random_effects_df, file.path(stats_folder, paste0("stats_random_effects_", each_temp, "C_", each_endpoint, "h.csv")))write_csv(contrasts_df, file.path(stats_folder, paste0("stats_contrasts_", each_temp, "C_", each_endpoint, "h.csv")))write_csv(data_summary_df, file.path(stats_folder, paste0("stats_summary_", each_temp, "C_", each_endpoint, "h.csv")))return(list(anova = anova_df,random_effects = random_effects_df,contrasts = contrasts_df,data_summary = data_summary_df,data_subset = data_subset,plot_1 = plot_1,plot_2 = plot_2,panel = panel ))}